library(data.table)
library(tidyverse)
## -- Attaching packages ------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------- tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
weather <- fread("weatherAUS.csv", data.table = FALSE)
weather.sydney <-
weather %>%
filter(Location == "Sydney")
weather.sydney$Date <- as.Date(weather.sydney$Date)
weather.sydney$RainTomorrow <- as.factor(weather.sydney$RainTomorrow)
weather.sydney$RainTomorrow[is.na(weather.sydney$RainTomorrow)] <- as.factor("No")
weather.sydney %>%
ggplot(aes(x = Rainfall)) + geom_histogram(binwidth = 3)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

norainy.day <-
weather.sydney %>% filter(Rainfall == 0) %>% nrow()
paste0(norainy.day/nrow(weather.sydney) * 100, "%" )
## [1] "60.5933473179503%"
prop.table(table(weather.sydney$RainTomorrow))
##
## No Yes
## 0.7407851 0.2592149
interactive_plot <- ggplot(weather.sydney, aes(x=Date, y = MaxTemp)) +
geom_line() + geom_smooth()
ggplotly(interactive_plot)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
weather.sydney %>%
ggplot(aes(x=RainTomorrow, y=Humidity3pm, colour = RainTomorrow, fill= RainTomorrow)) + geom_violin()
## Warning: Removed 13 rows containing non-finite values (stat_ydensity).

weather.sydney %>%
ggplot(aes(x=RainTomorrow, y=Pressure3pm, colour = RainTomorrow, fill= RainTomorrow)) +
geom_violin()
## Warning: Removed 19 rows containing non-finite values (stat_ydensity).

weather.sydney %>%
ggplot(aes(x=RainTomorrow, y=Rainfall, colour = RainTomorrow, fill= RainTomorrow)) + geom_violin()
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).

num_data <- weather.sydney %>%
select(contains("Temp"), Rainfall, contains("Spedd"),
contains("Humidity"), contains("Pressure"), RISK_MM)
pairs(num_data)

weather.sydney %>%
ggplot(aes(x=Humidity3pm, y = Rainfall)) +
geom_point() + geom_smooth(method = "lm")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).

humid_Tomorrowrain <-
weather.sydney %>%
select(Date, Humidity3pm, RainToday, RainTomorrow, Rainfall) %>%
transform(Tomorrow.Rainfall = c(Rainfall[-1], NA))
head(humid_Tomorrowrain)
## Date Humidity3pm RainToday RainTomorrow Rainfall Tomorrow.Rainfall
## 1 2008-02-01 84 Yes Yes 15.6 6.0
## 2 2008-02-02 73 Yes Yes 6.0 6.6
## 3 2008-02-03 86 Yes Yes 6.6 18.8
## 4 2008-02-04 90 Yes Yes 18.8 77.4
## 5 2008-02-05 74 Yes Yes 77.4 1.6
## 6 2008-02-06 62 Yes Yes 1.6 6.2
humid_Tomorrowrain %>%
ggplot(aes(x = Humidity3pm, y = Tomorrow.Rainfall)) +
geom_point() + geom_smooth(method='lm')
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).
